library(tidyverse)
library(Biostrings)
library(DECIPHER)
library(ggseqlogo)
library(htmltools)
library(plotly)
petases <- readAAStringSet("seqdump.txt")
petases <- AAStringSet(petases)
aligned_petases <- AlignSeqs(petases)
## Determining distance matrix based on shared 5-mers:
## ================================================================================
## 
## Time difference of 0 secs
## 
## Clustering into groups by similarity:
## ================================================================================
## 
## Time difference of 0.06 secs
## 
## Aligning Sequences:
## ================================================================================
## 
## Time difference of 0.12 secs
## 
## Iteration 1 of 2:
## 
## Determining distance matrix based on alignment:
## ================================================================================
## 
## Time difference of 0 secs
## 
## Reclustering into groups by similarity:
## ================================================================================
## 
## Time difference of 0 secs
## 
## Realigning Sequences:
## ================================================================================
## 
## Time difference of 0.11 secs
## 
## Iteration 2 of 2:
## 
## Determining distance matrix based on alignment:
## ================================================================================
## 
## Time difference of 0 secs
## 
## Reclustering into groups by similarity:
## ================================================================================
## 
## Time difference of 0 secs
## 
## Realigning Sequences:
## ================================================================================
## 
## Time difference of 0.01 secs
BrowseSeqs(aligned_petases, htmlFile = "output.html", openURL = FALSE)
htmltools::includeHTML("output.html")
                                                                                                       20                  40                  60                  80                 100                 120                 140                 160                 180                 200                 220                 240                 260                 280                 300        
                                                                                     '''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|''        
    6ILX_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    --------------------------MQTNPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGFSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCS--HHHHHH--------------    270    
    5XFY_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    -----------------------------NPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWAMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCS----------------------    261    
    5XJH_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCSLEDPAANKARKEAELAAATAEQ    300    
    5XFZ_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    ----------------------------MNPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALGQVASLNGTSSSPIYGKVDTARMGVMGWAMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCS----------------------    262    
    5XH3_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    -----------------------------NPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALGQVASLNGTSSSPIYGKVDTARMGVMGWAMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCS----------------------    261    
    6KUO_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSDQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCSLEDPAANKARKEAELAAATAEQ    300    
    5YNS_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    -----------------------------GSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTAVSDFRTANCSLED-------------------    264    
    6IJ5_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAAQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCSLEDPAANKARKEARLAAATAEQ    300    
    5YFE_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    --------------------------AQTNPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPNNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSANAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTNNCSLEHHHHHH--------------    272    
    6IJ4_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPESRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWHSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCSLEDPAANKARKEARLAAATAEQ    300    
    6KUQ_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQDLIGKKGVAWMKRFMDNDTRYSTFACENPNSTKVSDFRTANCSLEDPAANKARKEAELAAATAEQ    300    
    6IJ3_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPDSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWHSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCSLEDPAANKARKEARLAAATAEQ    300    
    6IJ6_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPESRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWHSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTAVSDFRTANCSLEDPAANKARKEARLAAATAEQ    300    
    6KUS_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis]    ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPESRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWHSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANTGNSDQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCSLEDPAANKARKEAELAAATAEQ    300    
                             6KY5_A Chain A, PET hydrolase [Ideonella sakaiensis]    MNFPRASRLMQAAVLGGLMAVSAAATAQTNPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTFDYPSSRSSQQMAALRQVASLNGDSSSPIYGKVDTARMGVMGHSMGGGASLRSAANNPSLKAAIPQAPWDSQTNFSSVTVPTLIFACENDSIAPVNSHALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTAVSDFRTANCSLEHHHHHH--------------    298    
                                                                                             
                                                                        Consensus    MNFPRASRLMQAXXXXXXXXXXXXXXXXXXXXXRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPXNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTXDXPXSRSSQQMAALXQVASLNGXSSSPIYGKVDTARMGVMGXXMGGGXSLXSAANNPSLKAAXXQAPWXSXTNFSSVTVPTLIFACENDSIAPVNSXALPIYDSMSXNAKQFLEINGGSHSCANXGNSBQXLIGKKGVAWMKRFMDNDTRYSTFACENPNSTXVSDFRTXNCS++XXXXXXARKEAXLAAATAEQ    312